################################################################################
#### Title: Analyses for SI Section 4
#### Year 2024
#### Portion of the Analysis: Demographics for Senders, Receivers 1, and 
####  Receivers 2
################################################################################

# Clear global environment 
rm(list=ls()) 

# Set working directory 
setwd("/Users/martinnaunov/Desktop/Desktop - Martin’s MacBook Pro (2)/Persuasion/2_workingdata")

# Load packages 
library(tidyverse)
library(readxl)
library(plotrix)

###########
# Study 1 #
###########

# Senders Study 1: 
df_senders_s1 <- read.csv("senders_working2_v3.csv") %>% 
  dplyr::select(edu, female, income, age, race, race1, race2, race3, race4, race5, race6, race7, pol_intr,
                pol_intr_full, pol_know_avg, persp_tak, nfc, mlengthwords, mlengthchar, 
                time, env_att_strength, imm_att_strength, trans_att_strength, write_yes, 
                side, StartDate, EndDate, Finished, partyid_7, partyid_5, repub_strength, demo_strength,
                party_lean, writing_task, respid, 
                personal_rev, specific_fact, common_ground) %>% 
  mutate(
    across(c(edu, income, age, race, race1, race2, race3, race4, race5, race6, race7,
             pol_intr, pol_intr_full, pol_know_avg, persp_tak, nfc, 
             mlengthwords, mlengthchar, time, partyid_7), as.numeric)
  ) %>% 
  mutate(
    across(c(female, race, env_att_strength, imm_att_strength, trans_att_strength, 
             side, Finished,personal_rev, specific_fact, common_ground ), as.factor)
  )


# Combined Senders and Receivers Data from Study 1: 
df_senders_rec_s1 <- read.csv("working_data_final_sr1_v2.csv") %>% 
  dplyr::select(pre, post, side, quarkelt1, quarkelt2, respid, respondent,
                partyid_7, issue, affpol, att_strength, pol_intr_full, pol_intr, 
                pol_know_avg, persp_tak, nfc, self_est, female, race, edu, 
                income, age, mlengthwords, mlengthchar, time, common_ground, 
                personal_rev, specific_fact, respondent, 
                task_persuade, task_expect, movement, binary) %>% 
  mutate(
    across(c(pre, post, quarkelt1, quarkelt2, partyid_7, affpol, 
             att_strength, pol_intr_full, pol_intr, pol_know_avg, 
             persp_tak, nfc,  self_est, income, age, mlengthwords, 
             mlengthchar, time, task_persuade, edu), as.numeric)
  ) %>%
  mutate(
    across(c(side, respid, respondent, 
             issue, female, race, common_ground,
             personal_rev, specific_fact, task_expect, movement, binary), as.factor)
  )

# Receivers Data from Study 1
df_rec_s1 <- read.csv("receivers_1_working_wide_v2.csv")


###########################
# Demographics of Senders #
###########################

# Load senders df with only the included arguments
df_senders_keep_s1 <- read_excel("arg_check.xlsx")

df_senders_keep_s1 <- df_senders_keep_s1 %>% 
  filter(is.na(Excluded) | Excluded != 1)

# Collect the respids that are part of the 400 respondents whose arguments were included 
unique_ids <- unique(df_senders_keep_s1$ResponseId)


# Use this to filter the senders df to only include those respondents whose arguments
# were eligible to be used. 
df_senders_s1 <- df_senders_s1 %>% 
  filter(respid %in% unique_ids)



# Create a race variable where those who chose Hispanic are categorized as Hispanic (even if they chose another race or 
# ethnicity), where those who chose Black, African-American are coded as such, unless they chose Hispanic, where those who 
# chose Asian are coded as Asian unless they chose Hispanic or Black, African-American, where those who chose
# 6 (Native Hawaiian or Pacific Islander), and 3 (American Indian, Native American or Alaska Native are coded) or Arab and 
# Middle Eastern 7 are coded as other, and where those who chose only white are coded as white. 

# Step 1: Create race, education, age, gender, and pid3 variables
# Note that for age, any respondent that selected under 18 is classified as NA.
df_senders_s1 <- df_senders_s1 %>%
  mutate(
    race_tbl = case_when(
      race6 == 1 ~ "Hispanic or Latino",          # Hispanic first as they should override others
      race2 == 1 ~ "Black or African American",   # Black second, if not Hispanic
      race4 == 1 ~ "Asian or Other",              # Asian third, if not Hispanic or Black
      race3 == 1 ~ "Asian or Other",                       # American Indian, Native American, Alaska Native
      race5 == 1 ~ "Asian or Other",                       # Native Hawaiian or Pacific Islander
      race7 == 1 ~ "Asian or Other",                       # Arab and Middle Eastern
      race1 == 1 ~ "White",                       # White if no other race is selected
      TRUE ~ NA_character_
    ), 
    edu_tbl = case_when(
      edu == 1 ~ "Less than HS", 
      edu == 2 ~ "High School", 
      edu == 3 ~ "Some College", 
      edu == 4 ~ "College Graduate", 
      edu == 5 ~ "Advanced Degree", 
      TRUE ~ NA_character_
    ), 
    age_tbl = case_when(
      age %in% c(2, 3) ~ "18-34", 
      age %in% c(4, 5) ~ "35-54", 
      age %in% c(6, 7) ~ "55-74", 
      age %in% c(8, 9) ~ "75 or older", 
      TRUE ~ NA_character_
    ), 
    female_tbl = case_when(
      female == 1 ~ "Yes", 
      female == 0 ~ "No", 
      TRUE ~ NA_character_
    ), 
    pid3_tbl = case_when(
      partyid_7 %in% c(1, 2, 3) ~ "Democrat", 
      partyid_7 == 4 ~ "Independent", 
      partyid_7 %in% c(5, 6, 7) ~ "Republican", 
      TRUE ~ NA_character_
    ), 
    income_tbl = case_when(
      income == 1 ~ "Less than $10,000", 
      income %in% c(2,3,4,5) ~ "$10,000 - $49,999", 
      income %in% c(6,7,8,9,10) ~ "$50,000 - $99,999", 
      income == 11  ~ "$100,000 - $149,999" , 
      income == 12 ~ "More than $150,000", 
      TRUE ~ NA_character_
    )
  ) %>% 
  # Step 2: Convert the new variables to factors with specific levels
  mutate(
    race_tbl = factor(race_tbl, levels = c("White", "Black or African American", "Hispanic or Latino", "Asian or Other")),
    edu_tbl = factor(edu_tbl, levels = c("Less than HS", "High School", "Some College", "College Graduate", "Advanced Degree")),
    age_tbl = factor(age_tbl, levels = c("18-34", "35-54", "55-74", "75 or older")), 
    female_tbl = factor(female_tbl, levels = c("Yes", "No")), 
    pid3_tbl = factor(pid3_tbl, levels = c("Democrat", "Republican", "Independent")), 
    income_tbl = factor(income_tbl, levels = c("Less than $10,000", "$10,000 - $49,999", 
                                               "$50,000 - $99,999", "$100,000 - $149,999", 
                                               "More than $150,000"))
  )

### Table A4.1 ###

# Race ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
df_senders_s1 %>% 
  filter(!is.na(race_tbl)) %>%  
  group_by(race_tbl) %>% 
  summarise(n = n()) %>% 
  mutate(freq = paste0(round(100 * n/sum(n), 3), '%'))

# race_tbl                      n freq 
# <fct>                     <int> <chr>
#   1 White                       298 74.5%
# 2 Black or African American    34 8.5% 
# 3 Hispanic or Latino           26 6.5% 
# 4 Asian or Other               42 10.5%

# Female ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
df_senders_s1 %>% 
  filter(!is.na(female_tbl)) %>%  
  group_by(female_tbl) %>% 
  summarise(n = n()) %>% 
  mutate(freq = paste0(round(100 * n/sum(n), 3), '%'))

# A tibble: 2 × 3
# female_tbl     n freq  
# <fct>      <int> <chr> 
#   1 Yes          191 47.75%
# 2 No           209 52.25%

# Age ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
df_senders_s1 %>% 
  filter(!is.na(age_tbl)) %>%  
  group_by(age_tbl) %>% 
  summarise(n = n()) %>% 
  mutate(freq = paste0(round(100 * n/sum(n), 3), '%'))

# age_tbl         n freq   
# <fct>       <int> <chr>  
#   1 18-34         151 38.035%
# 2 35-54         179 45.088%
# 3 55-74          64 16.121%
# 4 75 or older     3 0.756% 

# Education ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
df_senders_s1 %>% 
  filter(!is.na(edu_tbl)) %>%  
  group_by(edu_tbl) %>% 
  summarise(n = n()) %>% 
  mutate(freq = paste0(round(100 * n/sum(n), 3), '%'))

# A tibble: 5 × 3
# edu_tbl              n freq   
# <fct>            <int> <chr>  
#   1 Less than HS         4 1.003% 
# 2 High School         42 10.526%
# 3 Some College       126 31.579%
# 4 College Graduate   178 44.612%
# 5 Advanced Degree     49 12.281%

# Income ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
df_senders_s1 %>% 
  filter(!is.na(income_tbl)) %>%  
  group_by(income_tbl) %>% 
  summarise(n = n()) %>% 
  mutate(freq = paste0(round(100 * n/sum(n), 3), '%'))

# A tibble: 5 × 3
# income_tbl              n freq 
# <fct>               <int> <chr>
# 1 Less than $10,000      21 5.25%
# 2 $10,000 - $49,999     166 41.5%
# 3 $50,000 - $99,999     148 37%  
# 4 $100,000 - $149,999    38 9.5% 
# 5 More than $150,000     27 6.75%


# Partisanship ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
df_senders_s1 %>% 
  filter(!is.na(pid3_tbl)) %>%  
  group_by(pid3_tbl) %>% 
  summarise(n = n()) %>% 
  mutate(freq = paste0(round(100 * n/sum(n), 3), '%'))

# A tibble: 3 × 3
# pid3_tbl        n freq  
# <fct>       <int> <chr> 
#   1 Democrat      191 47.75%
# 2 Republican    189 47.25%
# 3 Independent    20 5%   


##############################################
# Descriptive Stats for Receivers in Study 1 #
##############################################

# Create a variable that indicates how many missing pre and post-opinion values 
# each respondent has. 
# Then extract the respondent ids for respondents that have missing values for either 
# all three pre-issue opinions or all three-post issue opinions.
unique_ids2 <- df_senders_rec_s1 %>% 
  group_by(respondent) %>%
  mutate(
    missingt1 = sum(is.na(quarkelt1)), 
    missingt2 = sum(is.na(quarkelt2))
  ) %>%   
  filter(missingt1 == 3 | missingt2 == 3) %>% 
  distinct(respondent) %>% 
  pull(respondent)

# Use the respondent ids from above to filter out respondents who have missing values 
# for either all three pre-issue opinions or all three post-issue opinions
df_rec_filtered_s1 <- df_rec_s1 %>% 
  filter(!respondent %in% unique_ids2)

# Check the alternative df to make sure that these individuals are indeed missing values 
# for either all the pre-opinion variables or the post-opinion variables 
df_rec_excluded_s1 <- df_rec_s1 %>% 
  filter(respondent %in% unique_ids2) %>% 
  select(respondent, pre_env, pre_imm, pre_trans, post_env, post_imm, post_trans)

rm(df_rec_excluded_s1)


# The only demographic information we collect for receivers in the first study is 
# partisanship. 
df_rec_filtered_s1 <- df_rec_filtered_s1 %>% 
  mutate(
    pid3_tbl = case_when(
      partyid_7 %in% c(1, 2, 3) ~ "Democrat", 
      partyid_7 == 4 ~ "Independent", 
      partyid_7 %in% c(5, 6, 7) ~ "Republican", 
      TRUE ~ NA_character_)
  ) %>% 
  mutate(
    pid3_tbl = factor(pid3_tbl, levels = c("Democrat", "Republican", "Independent"))
  )


### Table A4.2 ### 

# Study 1 #

# Partisanship
df_rec_filtered_s1 %>% 
  filter(!is.na(pid3_tbl)) %>%  
  group_by(pid3_tbl) %>% 
  summarise(n = n()) %>% 
  mutate(freq = paste0(round(100 * n/sum(n), 3), '%'))

# A tibble: 3 × 3
# pid3_tbl        n freq   
# <fct>       <int> <chr>  
# 1 Democrat      413 42.014%
# 2 Republican    517 52.594%
# 3 Independent    53 5.392% 


###########
# Study 2 #
###########

# Read in merged senders and receivers data from the second study. 
df_senders_rec_s2 <- read.csv("working_data_final_sr2.csv") %>% 
  dplyr::select(pre, post, side, quarkelt1, quarkelt2, respid, respondent,
                partyid_7, issue, affpol, att_strength, pol_intr_full, pol_intr, 
                pol_know_avg, persp_tak, nfc, self_est, female, race, edu, 
                income, age, mlengthwords, mlengthchar, time, common_ground, 
                personal_rev, specific_fact, respondent, 
                task_persuade, task_expect, movement, binary, partyid_7_rec,edu_rec,age_rec,income_rec,race_rec, female_main, female_alt, anchor, controlcond) %>% 
  mutate(
    across(c(pre, post, quarkelt1, quarkelt2, partyid_7, partyid_7_rec, affpol, 
             att_strength, pol_intr_full, pol_intr, pol_know_avg, 
             persp_tak, nfc, edu, self_est, income, age, mlengthwords, 
             mlengthchar, time, task_persuade, age_rec, income_rec), as.numeric)
  ) %>% 
  mutate(
    across(c(side, respid, respondent, 
             issue, female, race, race_rec, edu_rec, female_main, female_alt, common_ground,
             personal_rev, specific_fact, task_expect, movement, binary, anchor, controlcond,), as.factor)
  )

# Read in the receivers data from study 2
df_rec_s2 <- read.csv("receivers_2_working_wide.csv")

################################################
# Descriptive Statistics for Study 2 Receivers #
################################################

# Create a variable that indicates how many missing pre and post-opinion values 
# each respondent has. 
# Then extract the respondent ids for respondents that have missing values for either 
# all three pre-issue opinions or all three-post issue opinions.
unique_ids3 <- df_senders_rec_s2 %>% 
  group_by(respondent) %>%
  mutate(
    missingt1 = sum(is.na(quarkelt1)), 
    missingt2 = sum(is.na(quarkelt2))
  ) %>%   
  filter(missingt1 == 3 | missingt2 == 3) %>% 
  distinct(respondent) %>% 
  pull(respondent)

# Use the respondent ids from above to filter out respondents who have missing values 
# for either all three pre-issue opinions or all three post-issue opinions
df_rec_filtered_s2 <- df_rec_s2 %>% 
  filter(!respondent %in% unique_ids3)

### Descriptive Statistics ###

# Now use df_rec_filtered to make a descriptive statistics table.
# Make variables that match those specified in quotas requested from Cloud Research.

# First,create a variable where those who selected that they were hispanic (race3 == 1) will be equal
# to "Hispanic or Latino", those who selected Black will be equal to "Black or African American", those who selected 
# Asian (race4 == 1) are coded as Asian, those select American Indian, Native American, or Alaska Native (race5 == 1), Native Hawaiian or 
# Pacific Islander (race6 == 1), and those who selected Other,not listed (race7 == 1 are coded as other)

# For age, combine Under 18-29, 30-59, and 60-70+ so that this matches the quotas
# indicated in Cloud Research.

# For income, we code 13, "Prefer not to Say" as NA 
df_rec_filtered_s2 <- df_rec_filtered_s2 %>%   
  mutate(
    race_tbl = case_when(
      race3 == 1 ~ "Hispanic or Latino", 
      race2 == 1 ~ "Black or African American", 
      race4 == 1 ~ "Asian or Other", 
      race5 == 1 ~ "Asian or Other", 
      race6 == 1 ~ "Asian or Other", 
      race7 == 1 ~ "Asian or Other", 
      race1 == 1 ~ "White"
    ), 
    edu_tbl = case_when(
      edu == 1 ~ "Less than HS", 
      edu == 2 ~ "High School", 
      edu == 3 ~ "Some College", 
      edu == 4 ~ "College Graduate", 
      edu == 5 ~ "Advanced Degree", 
      TRUE ~ NA_character_
    ),
    age_tbl = case_when(
      age == 2 ~ "18-29", 
      age %in% c(3, 4, 5) ~ "30-59", 
      age %in% c(6, 7) ~ "60-70+", 
      TRUE ~ NA_character_
    ),
    female_tbl = case_when(
      female_main == 1 ~ "Yes",  
      female_main == 0 ~ "No", 
      TRUE ~ NA_character_), 
    pid3_tbl = case_when(
      partyid_7 %in% c(1, 2, 3) ~ "Democrat", 
      partyid_7 == 4 ~ "Independent", 
      partyid_7 %in% c(5, 6, 7) ~ "Republican", 
      TRUE ~ NA_character_),
    income_tbl = case_when(
      income == 1 ~ "Less than $10,000", 
      income %in% c(2,3,4,5) ~ "$10,000 - $49,999", 
      income %in% c(6,7,8,9,10) ~ "$50,000 - $99,999", 
      income == 11  ~ "$100,000 - $149,999" , 
      income == 12 ~ "More than $150,000", 
      TRUE ~ NA_character_
    )
  ) %>% 
  # Step 2: Convert the new variables to factors with specific levels
  mutate(
    race_tbl = factor(race_tbl, levels = c("White", "Black or African American", "Hispanic or Latino", "Asian or Other")),
    edu_tbl = factor(edu_tbl, levels = c("Less than HS", "High School", "Some College", "College Graduate", "Advanced Degree")),
    age_tbl = factor(age_tbl, levels = c("18-29", "30-59", "60-70+")), 
    female_tbl = factor(female_tbl, levels = c("Yes", "No")), 
    income_tbl = factor(income_tbl, levels = c("Less than $10,000", "$10,000 - $49,999", 
                                               "$50,000 - $99,999", "$100,000 - $149,999", 
                                               "More than $150,000"))
    
  ) 


### Table A4.2 ###

# Race ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
df_rec_filtered_s2 %>% 
  filter(!is.na(race_tbl)) %>%
  group_by(race_tbl) %>% 
  summarise(n = n()) %>% 
  mutate(freq = paste0(round(100 * n/sum(n), 3), '%'))

# A tibble: 4 × 3
# race_tbl                      n freq   
# <fct>                     <int> <chr>  
# 1 White                      1709 68.115%
# 2 Black or African American   351 13.99% 
# 3 Hispanic or Latino          252 10.044%
# 4 Asian or Other              197 7.852% 

# Female ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
df_rec_filtered_s2 %>% 
  filter(!is.na(female_tbl)) %>%
  group_by(female_tbl) %>% 
  summarise(n = n()) %>% 
  mutate(freq = paste0(round(100 * n/sum(n), 3), '%'))

# A tibble: 2 × 3
# female_tbl     n freq   
# <fct>      <int> <chr>  
# 1 Yes         1306 52.576%
# 2 No          1178 47.424%

# Age ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
df_rec_filtered_s2 %>% 
  filter(!is.na(age_tbl)) %>%
  group_by(age_tbl) %>% 
  summarise(n = n()) %>% 
  mutate(freq = paste0(round(100 * n/sum(n), 3), '%'))

# A tibble: 3 × 3
# age_tbl     n freq   
# <fct>   <int> <chr>  
# 1 18-29     547 21.845%
# 2 30-59    1309 52.276%
# 3 60-70+    648 25.879%

# Education ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
df_rec_filtered_s2 %>% 
  filter(!is.na(edu_tbl)) %>%
  group_by(edu_tbl) %>% 
  summarise(n = n()) %>% 
  mutate(freq = paste0(round(100 * n/sum(n), 3), '%'))

# A tibble: 5 × 3
# edu_tbl              n freq   
# <fct>            <int> <chr>  
# 1 Less than HS        14 0.558% 
# 2 High School        280 11.16% 
# 3 Some College       805 32.084%
# 4 College Graduate   967 38.541%
# 5 Advanced Degree    443 17.656%

# Income ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
df_rec_filtered_s2 %>% 
  filter(!is.na(income_tbl)) %>%
  group_by(income_tbl) %>% 
  summarise(n = n()) %>% 
  mutate(freq = paste0(round(100 * n/sum(n), 3), '%'))

# A tibble: 5 × 3
# income_tbl              n freq   
# <fct>               <int> <chr>  
# 1 Less than $10,000      71 2.874% 
# 2 $10,000 - $49,999     838 33.927%
# 3 $50,000 - $99,999     978 39.595%
# 4 $100,000 - $149,999   380 15.385%
# 5 More than $150,000    203 8.219%

# Party ID ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
df_rec_filtered_s2 %>% 
  filter(!is.na(pid3_tbl)) %>%
  group_by(pid3_tbl) %>% 
  summarise(n = n()) %>% 
  mutate(freq = paste0(round(100 * n/sum(n), 3), '%'))

# A tibble: 3 × 3
# pid3_tbl        n freq   
# <chr>       <int> <chr>  
# 1 Democrat     1507 59.66% 
# 2 Independent   283 11.203%
# 3 Republican    736 29.137%


##########################################################################
# Information for Senders Depending on Whether They Chose to Participate #
##########################################################################

# Table A.4.3: Comparisons of Senders who participated (vs. did not) in persuasion task

# Senders:  
df_senders_s1 <- read.csv("senders_working2_v3.csv") %>% 
  dplyr::select(edu, female, income, age, race, race1, race2, race3, race4, race5, race6, race7, pol_intr,
                pol_intr_full, pol_know_avg, persp_tak, nfc, mlengthwords, mlengthchar, 
                time, env_att_strength, imm_att_strength, trans_att_strength, write_yes, 
                side, StartDate, EndDate, Finished, partyid_7, partyid_5, repub_strength, demo_strength,
                party_lean, writing_task, respid, 
                personal_rev, specific_fact, common_ground) %>% 
  mutate(
    across(c(edu, income, age, race, race1, race2, race3, race4, race5, race6, race7,
             pol_intr, pol_intr_full, pol_know_avg, persp_tak, nfc, 
             mlengthwords, mlengthchar, time, partyid_7), as.numeric)
  ) %>% 
  mutate(
    across(c(female, race, env_att_strength, imm_att_strength, trans_att_strength, 
             side, Finished,personal_rev, specific_fact, common_ground ), as.factor)
  )

# Compare ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Create a variable that separates those that took a side on an issue and chose to write
# Those that took a side on an issue an chose not to write, 
# Those that did not take a side on issue because they expressed all neutral opinions
# and the remaining set of respondents

df_senders_s1 <- df_senders_s1 %>% 
  mutate(
    compare = case_when(
      !is.na(side) & write_yes == TRUE ~ 0, 
      !is.na(side) & write_yes == FALSE ~ 1, 
      is.na(side) & (env_att_strength == 0 & imm_att_strength == 0 & trans_att_strength == 0) & write_yes == FALSE ~ 2, 
      TRUE ~ NA_integer_)
    )


### Table A4.3 ###

# Age ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
df_senders_s1 %>% 
  group_by(compare) %>% 
  summarise(
    mean_age = mean(age, na.rm = TRUE), 
    se_age = plotrix::std.error(age, na.rm = TRUE)
    )

# A tibble: 4 × 3
# compare mean_age se_age
# <dbl>    <dbl>  <dbl>
# 1       0     4.11 0.0621
# 2       1     3.99 0.103 
# 3       2     3.33 0.333 
# 4      NA     4.33 0.289 

# Partisanship ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
df_senders_s1 %>% 
  group_by(compare) %>% 
  summarise(
    mean_pid = mean(partyid_7, na.rm = TRUE), 
    se_age = plotrix::std.error(partyid_7, na.rm = TRUE)
  )

# A tibble: 4 × 3
# compare mean_pid se_age
# <dbl>    <dbl>  <dbl>
# 1       0     3.93  0.113
# 2       1     4.01  0.187
# 3       2     2.75  0.75 
# 4      NA     3.89  0.655

# Education ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
df_senders_s1 %>% 
  group_by(compare) %>% 
  summarise(
    mean_edu = mean(edu, na.rm = TRUE), 
    se_edu = plotrix::std.error(edu, na.rm = TRUE)
  )

# compare mean_edu se_edu
# <dbl>    <dbl>  <dbl>
# 1       0     3.61 0.0431
# 2       1     3.40 0.0704
# 3       2     4    0     
# 4      NA     2.56 0.242 

# Political Interest ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 df_senders_s1 %>% 
  group_by(compare) %>% 
  summarise(
    mean_polint = mean(pol_intr_full, na.rm = TRUE), 
    se_polint = plotrix::std.error(pol_intr_full, na.rm = TRUE)
  )

# A tibble: 4 × 3
#  compare mean_polint se_polint
# <dbl>       <dbl>     <dbl>
# 1       0      0.526     0.0119
# 2       1      0.497     0.0203
# 3       2      0.0625    0.0361
# 4      NA      0.298     0.0874


